*---------------------------------------------------------------------
version 10
cap clear
cap log close
set more off
cd "E:\REStat_MS14767_Vol96(2)\Data preparation Compustat"
log using "5_raw_rjv_panel_america.log", replace
*-----------------------------------------------------------------

****************************************
* This short file  drops non-american firms
* from the RJV data set as well as observations from 1985
* and entries without ticker.
****************************************


*************
* generate inside/outside dummy
*************

use "raw_rjv_panel.dta", clear
gen ins=0
replace ins=1 if year>= eyear
replace ins=0 if year> xyear

*************
*we drop all infos from 1985 (we do not have COMPUSTAT data for that year)
*************

count if year==1985
* 14016
drop if year==1985


*************
*we drop all firms that have no ticker (we cannot use them in the market share analysis)
*************

count if ticker==""
* 112560

drop if ticker==""

so ticker year
merge ticker year using "ticker_country.dta"
tab _merge

*****************************************************************
* 
*      _merge |      Freq.     Percent        Cum.
* ------------+-----------------------------------
*           1 |        420        0.10        0.10
*           2 |    317,968       79.17       79.27
*           3 |     83,244       20.73      100.00
* ------------+-----------------------------------
*       Total |    401,632      100.00
*
* 
* _merge==1 : it is that 9 firms that appear in RJV-database with ticker
* but have no counterpart in compustat.
*************

drop if _merge==2
drop _merge

*************
* here we drop all firms that are not american
*************

count if countryinc!=0
* 14182

drop if countryinc!=0


*-*-*-*-*-*-* NOTE - DATA CORRECTION!!! *-*-*-*-*-*-*-*-*-*

******* 
* here we have to correct for the fact that for each entityname (connected to the TICKER)
* there might be several entrynames.
* we assume that the "mother" firm is in the RJV if at least one of the entitynames is in that RJV
* we then keep only one observation per entityname, RJV, year.
*************

egen ins2=max(ins), by(comnum rjvnum year)
drop ins
rename ins2 ins
count if rjvnum==rjvnum[_n-1] & year==year[_n-1] & comnum==comnum[_n-1]
* 1640
drop if rjvnum==rjvnum[_n-1] & year==year[_n-1] & comnum==comnum[_n-1]

*-*-*-*-*-*-*-*-*-**-*-*-*-*-*-*-*-*-**-*-*-*-*-*-*-*-*-*

desc

* obs:        67,842                          
* vars:           16 

save raw_rjv_panel_america.dta, replace
log close
